Allstate Kaggle

Allstate Kaggle EDA: https://www.kaggle.com/c/allstate-claims-severity Training set is 188318 rows 131 variables (without loss variable) id + 116 categorical + 14 continuous variables

Test set is 125546 rows Total data set is 313864 items.

Possible Insights: - All the categorical values are between 0 and 1

Summaries

summary(train)
##        id         cat1       cat2       cat3       cat4       cat5      
##  Min.   :     1   A:141550   A:106721   A:177993   A:128395   A:123737  
##  1st Qu.:147748   B: 46768   B: 81597   B: 10325   B: 59923   B: 64581  
##  Median :294540                                                         
##  Mean   :294136                                                         
##  3rd Qu.:440680                                                         
##  Max.   :587633                                                         
##                                                                         
##  cat6       cat7       cat8       cat9       cat10      cat11     
##  A:131693   A:183744   A:177274   A:113122   A:160213   A:168186  
##  B: 56625   B:  4574   B: 11044   B: 75196   B: 28105   B: 20132  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat12      cat13      cat14      cat15      cat16      cat17     
##  A:159825   A:168851   A:186041   A:188284   A:181843   A:187009  
##  B: 28493   B: 19467   B:  2277   B:    34   B:  6475   B:  1309  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat18      cat19      cat20      cat21      cat22      cat23     
##  A:187331   A:186510   A:188114   A:187905   A:188275   A:157445  
##  B:   987   B:  1808   B:   204   B:   413   B:    43   B: 30873  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat24      cat25      cat26      cat27      cat28      cat29     
##  A:181977   A:169969   A:177119   A:168250   A:180938   A:184593  
##  B:  6341   B: 18349   B: 11199   B: 20068   B:  7380   B:  3725  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat30      cat31      cat32      cat33      cat34      cat35     
##  A:184760   A:182980   A:187107   A:187361   A:187734   A:188105  
##  B:  3558   B:  5338   B:  1211   B:   957   B:   584   B:   213  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat36      cat37      cat38      cat39      cat40      cat41     
##  A:156313   A:165729   A:169323   A:183393   A:180119   A:181177  
##  B: 32005   B: 22589   B: 18995   B:  4925   B:  8199   B:  7141  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat42      cat43      cat44      cat45      cat46      cat47     
##  A:186623   A:184110   A:172716   A:183991   A:187436   A:187617  
##  B:  1695   B:  4208   B: 15602   B:  4327   B:   882   B:   701  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat48      cat49      cat50      cat51      cat52      cat53     
##  A:188049   A:179127   A:137611   A:187071   A:179505   A:172949  
##  B:   269   B:  9191   B: 50707   B:  1247   B:  8813   B: 15369  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat54      cat55      cat56      cat57      cat58      cat59     
##  A:183762   A:188173   A:188136   A:185296   A:188079   A:188018  
##  B:  4556   B:   145   B:   182   B:  3022   B:   239   B:   300  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat60      cat61      cat62      cat63      cat64      cat65     
##  A:187872   A:187596   A:188273   A:188239   A:188271   A:186056  
##  B:   446   B:   722   B:    45   B:    79   B:    47   B:  2262  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat66      cat67      cat68      cat69      cat70      cat71     
##  A:179982   A:187626   A:188176   A:188011   A:188295   A:178646  
##  B:  8336   B:   692   B:   142   B:   307   B:    23   B:  9672  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat72      cat73      cat74      cat75      cat76      cat77     
##  A:118322   A:154275   A:184731   A:154307   A:181347   A:    49  
##  B: 69996   B: 34017   B:  3561   B: 34010   B:  6183   B:   358  
##             C:    26   C:    26   C:     1   C:   788   C:   408  
##                                                         D:187503  
##                                                                   
##                                                                   
##                                                                   
##  cat78      cat79      cat80      cat81      cat82      cat83     
##  A:   788   A:  7064   A:   783   A:   788   A: 19322   A: 26038  
##  B:186526   B:152929   B: 46538   B: 24132   B:147536   B:141534  
##  C:   645   C:  1668   C:  3492   C:  9013   C:  2655   C:  4958  
##  D:   359   D: 26657   D:137505   D:154385   D: 18805   D: 15788  
##                                                                   
##                                                                   
##                                                                   
##  cat84      cat85      cat86      cat87      cat88          cat89       
##  A: 29450   A:   788   A:  1589   A:   788   A:168926   A      :183744  
##  B:   431   B:186005   B:103852   B:166992   B:     7   B      :  4312  
##  C:154939   C:  1011   C: 10290   C:  8819   D: 19302   C      :   220  
##  D:  3498   D:   514   D: 72587   D: 11719   E:    83   D      :    33  
##                                                         E      :     5  
##                                                         I      :     2  
##                                                         (Other):     2  
##  cat90          cat91        cat92      cat93      cat94      cat95    
##  A:177993   A      :111028   A:124689   A:   432   A:   738   A: 3736  
##  B:  9515   B      : 42630   B:   628   B:  1133   B: 51710   B:  109  
##  C:   728   G      : 26734   C:    62   C: 35788   C: 13623   C:87531  
##  D:    70   C      :  6400   D:    11   D:150237   D:121642   D:79525  
##  E:     6   D      :  1149   F:     1   E:   728   E:    91   E:17417  
##  F:     4   E      :   254   H: 62901              F:   494            
##  G:     2   (Other):   123   I:    26              G:    20            
##      cat96        cat97     cat98          cat99           cat100     
##  E      :174360   A:41970   A:105492   P      :79455   F      :42970  
##  D      :  7922   B:   34   B:   542   T      :72591   I      :39933  
##  B      :  2957   C:78127   C: 21485   R      :10290   L      :19961  
##  G      :  2665   D: 3779   D: 50557   D      : 8844   K      :13817  
##  F      :   343   E:47450   E: 10242   S      : 7045   G      :12935  
##  A      :    35   F:  213              N      : 2894   J      :12027  
##  (Other):    36   G:16745              (Other): 7199   (Other):46675  
##      cat101           cat102           cat103           cat104     
##  A      :106721   A      :177274   A      :123737   E      :42925  
##  D      : 17171   B      :  5155   B      : 33342   G      :40660  
##  C      : 16971   C      :  4929   C      : 16508   D      :27611  
##  G      : 10944   E      :   482   D      :  7806   F      :19228  
##  F      : 10139   D      :   449   E      :  4473   H      :17187  
##  J      :  7259   G      :    15   F      :  1528   K      :14297  
##  (Other): 19113   (Other):    14   (Other):   924   (Other):26410  
##      cat105          cat106          cat107          cat108     
##  E      :76493   G      :47165   F      :47310   B      :65512  
##  F      :62892   H      :37713   G      :28560   K      :42435  
##  G      :20613   F      :36143   H      :23461   G      :21421  
##  D      :12172   I      :21433   J      :22405   D      :19160  
##  H      :11258   J      :18281   K      :20236   F      :10242  
##  I      : 2941   E      :13000   I      :20066   A      : 9299  
##  (Other): 1949   (Other):14583   (Other):26280   (Other):20249  
##      cat109           cat110          cat111           cat112     
##  BI     :152918   CL     :25305   A      :128395   E      :25148  
##  AB     : 21933   EG     :24654   C      : 32401   AH     :18639  
##  BU     :  3142   CS     :24592   E      : 14682   AS     :17669  
##  K      :  2999   EB     :21396   G      :  7039   J      :16222  
##  G      :  1353   CO     :17495   I      :  3578   AF     : 9368  
##  BQ     :  1067   BT     :16365   K      :  1353   AN     : 9138  
##  (Other):  4906   (Other):58511   (Other):   870   (Other):92134  
##      cat113          cat114           cat115          cat116      
##  BM     :26191   A      :131693   K      :43866   HK     : 21061  
##  AE     :22030   C      : 16793   O      :26813   DJ     : 20244  
##  L      :13058   E      : 16475   J      :23895   CK     : 10162  
##  AX     :12661   J      :  8199   N      :22438   DP     :  9202  
##  Y      :11374   F      :  7905   P      :21538   GS     :  8736  
##  K      : 7738   N      :  2455   L      :16125   CR     :  6862  
##  (Other):95266   (Other):  4798   (Other):33643   (Other):112051  
##      cont1              cont2              cont3              cont4       
##  Min.   :0.000016   Min.   :0.001149   Min.   :0.002634   Min.   :0.1769  
##  1st Qu.:0.346090   1st Qu.:0.358319   1st Qu.:0.336963   1st Qu.:0.3274  
##  Median :0.475784   Median :0.555782   Median :0.527991   Median :0.4529  
##  Mean   :0.493861   Mean   :0.507188   Mean   :0.498918   Mean   :0.4918  
##  3rd Qu.:0.623912   3rd Qu.:0.681761   3rd Qu.:0.634224   3rd Qu.:0.6521  
##  Max.   :0.984975   Max.   :0.862654   Max.   :0.944251   Max.   :0.9543  
##                                                                           
##      cont5            cont6             cont7            cont8       
##  Min.   :0.2811   Min.   :0.01268   Min.   :0.0695   Min.   :0.2369  
##  1st Qu.:0.2811   1st Qu.:0.33610   1st Qu.:0.3502   1st Qu.:0.3128  
##  Median :0.4223   Median :0.44094   Median :0.4383   Median :0.4411  
##  Mean   :0.4874   Mean   :0.49094   Mean   :0.4850   Mean   :0.4864  
##  3rd Qu.:0.6433   3rd Qu.:0.65502   3rd Qu.:0.5910   3rd Qu.:0.6236  
##  Max.   :0.9837   Max.   :0.99716   Max.   :1.0000   Max.   :0.9802  
##                                                                      
##      cont9             cont10           cont11            cont12       
##  Min.   :0.00008   Min.   :0.0000   Min.   :0.03532   Min.   :0.03623  
##  1st Qu.:0.35897   1st Qu.:0.3646   1st Qu.:0.31096   1st Qu.:0.31166  
##  Median :0.44145   Median :0.4612   Median :0.45720   Median :0.46229  
##  Mean   :0.48551   Mean   :0.4981   Mean   :0.49351   Mean   :0.49315  
##  3rd Qu.:0.56682   3rd Qu.:0.6146   3rd Qu.:0.67892   3rd Qu.:0.67576  
##  Max.   :0.99540   Max.   :0.9950   Max.   :0.99874   Max.   :0.99848  
##                                                                        
##      cont13             cont14            loss          
##  Min.   :0.000228   Min.   :0.1797   Min.   :     0.67  
##  1st Qu.:0.315758   1st Qu.:0.2946   1st Qu.:  1204.46  
##  Median :0.363547   Median :0.4074   Median :  2115.57  
##  Mean   :0.493138   Mean   :0.4957   Mean   :  3037.34  
##  3rd Qu.:0.689974   3rd Qu.:0.7246   3rd Qu.:  3864.05  
##  Max.   :0.988494   Max.   :0.8448   Max.   :121012.25  
## 
summary(test)
##        id         cat1      cat2      cat3       cat4      cat5     
##  Min.   :     4   A:94096   A:71203   A:118752   A:86026   A:82282  
##  1st Qu.:146414   B:31450   B:54343   B:  6794   B:39520   B:43264  
##  Median :294306                                                     
##  Mean   :294067                                                     
##  3rd Qu.:441800                                                     
##  Max.   :587634                                                     
##                                                                     
##  cat6      cat7       cat8       cat9      cat10      cat11     
##  A:88014   A:122546   A:118112   A:75509   A:106944   A:112470  
##  B:37532   B:  3000   B:  7434   B:50037   B: 18602   B: 13076  
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##  cat12      cat13      cat14      cat15      cat16      cat17     
##  A:106777   A:112604   A:123954   A:125523   A:121262   A:124666  
##  B: 18769   B: 12942   B:  1592   B:    23   B:  4284   B:   880  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat18      cat19      cat20      cat21      cat22      cat23     
##  A:124902   A:124376   A:125434   A:125291   A:125519   A:105272  
##  B:   644   B:  1170   B:   112   B:   255   B:    27   B: 20274  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat24      cat25      cat26      cat27      cat28      cat29     
##  A:121430   A:113405   A:118077   A:112239   A:120751   A:123093  
##  B:  4116   B: 12141   B:  7469   B: 13307   B:  4795   B:  2453  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat30      cat31      cat32      cat33      cat34      cat35     
##  A:123247   A:122061   A:124723   A:124914   A:125184   A:125417  
##  B:  2299   B:  3485   B:   823   B:   632   B:   362   B:   129  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat36      cat37      cat38      cat39      cat40      cat41     
##  A:104035   A:110512   A:112774   A:122170   A:120081   A:120840  
##  B: 21511   B: 15034   B: 12772   B:  3376   B:  5465   B:  4706  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat42      cat43      cat44      cat45      cat46      cat47     
##  A:124343   A:122811   A:114985   A:122647   A:124972   A:125055  
##  B:  1203   B:  2735   B: 10561   B:  2899   B:   574   B:   491  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat48      cat49      cat50     cat51      cat52      cat53     
##  A:125366   A:119495   A:91888   A:124761   A:119761   A:115388  
##  B:   180   B:  6051   B:33658   B:   785   B:  5785   B: 10158  
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  cat54      cat55      cat56      cat57      cat58      cat59     
##  A:122577   A:125449   A:125435   A:123560   A:125393   A:125340  
##  B:  2969   B:    97   B:   111   B:  1986   B:   153   B:   206  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat60      cat61      cat62      cat63      cat64      cat65     
##  A:125289   A:125024   A:125525   A:125501   A:125524   A:124021  
##  B:   257   B:   522   B:    21   B:    45   B:    22   B:  1525  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat66      cat67      cat68      cat69      cat70      cat71     
##  A:119930   A:125112   A:125437   A:125351   A:125526   A:119187  
##  B:  5616   B:   434   B:   109   B:   195   B:    20   B:  6359  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  cat72     cat73      cat74      cat75      cat76      cat77     
##  A:79486   A:102595   A:123055   A:102828   A:120866   A:    34  
##  B:46060   B: 22928   B:  2468   B: 22716   B:  4125   B:   264  
##            C:    23   C:    23   C:     2   C:   555   C:   272  
##                                                        D:124976  
##                                                                  
##                                                                  
##                                                                  
##  cat78      cat79      cat80     cat81      cat82     cat83     cat84     
##  A:   558   A:  4677   A:  552   A:   558   A:13026   A:17389   A: 19802  
##  B:124325   B:102007   B:30736   B: 15829   B:98004   B:94109   B:   283  
##  C:   445   C:  1168   C: 2419   C:  6245   C: 1735   C: 3452   C:103199  
##  D:   218   D: 17694   D:91839   D:102914   D:12781   D:10596   D:  2262  
##                                                                           
##                                                                           
##                                                                           
##  cat85      cat86     cat87      cat88          cat89        cat90     
##  A:   558   A: 1126   A:   558   A:112427   A      :122546   A:118752  
##  B:123963   B:68647   B:111306   B:    12   B      :  2832   B:  6277  
##  C:   699   C: 6755   C:  5874   D: 13037   C      :   141   C:   467  
##  D:   326   D:49018   D:  7808   E:    70   D      :    20   D:    42  
##                                             E      :     2   E:     5  
##                                             F      :     2   F:     3  
##                                             (Other):     3             
##      cat91           cat92       cat93      cat94     cat95    
##  A      :73787   A      :83053   A:   308   A:  487   A: 2470  
##  B      :28501   H      :41969   B:   782   B:34558   B:   63  
##  G      :17857   B      :  448   C: 23948   C: 9190   C:58836  
##  C      : 4369   C      :   48   D:100031   D:80907   D:52693  
##  D      :  785   I      :   23   E:   477   E:   59   E:11484  
##  E      :  148   D      :    2              F:  333            
##  (Other):   99   (Other):    3              G:   12            
##      cat96        cat97     cat98         cat99           cat100     
##  E      :116162   A:28475   A:69800   P      :52645   F      :28847  
##  D      :  5414   B:   24   B:  382   T      :49028   I      :26686  
##  B      :  1901   C:51813   C:14509   R      : 6755   L      :13277  
##  G      :  1803   D: 2520   D:34126   D      : 5739   K      : 9035  
##  F      :   229   E:31665   E: 6729   S      : 4669   G      : 8639  
##  A      :    19   F:  133             N      : 1877   J      : 7969  
##  (Other):    18   G:10916             (Other): 4833   (Other):31093  
##      cat101      cat102         cat103          cat104     
##  A      :71203   A:118112   A      :82282   E      :28632  
##  D      :11648   B:  3505   B      :22152   G      :26966  
##  C      :11244   C:  3305   C      :11284   D      :18250  
##  G      : 7239   D:   291   D      : 5180   F      :12841  
##  F      : 6882   E:   310   E      : 3062   H      :11275  
##  J      : 4833   F:     7   F      :  978   K      : 9934  
##  (Other):12497   G:    16   (Other):  608   (Other):17648  
##      cat105          cat106          cat107          cat108     
##  E      :50984   G      :31015   F      :31553   B      :43219  
##  F      :41920   H      :24884   G      :18665   K      :28723  
##  G      :13682   F      :24377   H      :15683   G      :14478  
##  D      : 8173   I      :14441   J      :15025   D      :12788  
##  H      : 7505   J      :12092   K      :13672   F      : 6729  
##  I      : 1926   E      : 8871   I      :13343   A      : 6118  
##  (Other): 1356   (Other): 9866   (Other):17605   (Other):13491  
##      cat109           cat110          cat111          cat112     
##  BI     :102134   CL     :16792   A      :86026   E      :16961  
##  AB     : 14356   CS     :16404   C      :21323   AH     :12510  
##  BU     :  2174   EG     :16231   E      : 9793   AS     :11641  
##  K      :  1957   EB     :14569   G      : 4560   J      :10852  
##  G      :   911   CO     :11721   I      : 2473   AN     : 6182  
##  BQ     :   721   BT     :10902   K      :  824   AF     : 6130  
##  (Other):  3293   (Other):38927   (Other):  547   (Other):61270  
##      cat113          cat114          cat115          cat116     
##  BM     :17565   A      :88014   K      :29161   HK     :14015  
##  AE     :14885   E      :10994   O      :17933   DJ     :13730  
##  L      : 8780   C      :10953   J      :15731   CK     : 6685  
##  AX     : 8393   F      : 5534   N      :14984   DP     : 6185  
##  Y      : 7772   J      : 5371   P      :14830   GS     : 5913  
##  K      : 5047   N      : 1642   L      :10518   CR     : 4549  
##  (Other):63104   (Other): 3038   (Other):22389   (Other):74469  
##      cont1              cont2              cont3              cont4       
##  Min.   :0.000016   Min.   :0.001149   Min.   :0.002634   Min.   :0.1769  
##  1st Qu.:0.347403   1st Qu.:0.358319   1st Qu.:0.336963   1st Qu.:0.3274  
##  Median :0.475784   Median :0.555782   Median :0.527991   Median :0.4529  
##  Mean   :0.494447   Mean   :0.506939   Mean   :0.498255   Mean   :0.4923  
##  3rd Qu.:0.626630   3rd Qu.:0.681761   3rd Qu.:0.634224   3rd Qu.:0.6521  
##  Max.   :0.984975   Max.   :0.862654   Max.   :0.944251   Max.   :0.9560  
##                                                                           
##      cont5            cont6             cont7            cont8       
##  Min.   :0.2811   Min.   :0.01268   Min.   :0.0695   Min.   :0.2369  
##  1st Qu.:0.2811   1st Qu.:0.33610   1st Qu.:0.3521   1st Qu.:0.3180  
##  Median :0.4223   Median :0.44153   Median :0.4389   Median :0.4411  
##  Mean   :0.4876   Mean   :0.49219   Mean   :0.4859   Mean   :0.4874  
##  3rd Qu.:0.6433   3rd Qu.:0.65926   3rd Qu.:0.5913   3rd Qu.:0.6292  
##  Max.   :0.9831   Max.   :0.99716   Max.   :1.0000   Max.   :0.9828  
##                                                                      
##      cont9             cont10           cont11            cont12       
##  Min.   :0.00008   Min.   :0.0000   Min.   :0.03532   Min.   :0.03623  
##  1st Qu.:0.35897   1st Qu.:0.3646   1st Qu.:0.31096   1st Qu.:0.31825  
##  Median :0.44145   Median :0.4667   Median :0.45720   Median :0.46229  
##  Mean   :0.48602   Mean   :0.4989   Mean   :0.49436   Mean   :0.49403  
##  3rd Qu.:0.56889   3rd Qu.:0.6198   3rd Qu.:0.67892   3rd Qu.:0.68241  
##  Max.   :0.99540   Max.   :0.9950   Max.   :0.99783   Max.   :0.99742  
##                                                                        
##      cont13             cont14      
##  Min.   :0.000228   Min.   :0.1786  
##  1st Qu.:0.315758   1st Qu.:0.2948  
##  Median :0.363547   Median :0.4061  
##  Mean   :0.495086   Mean   :0.4956  
##  3rd Qu.:0.689974   3rd Qu.:0.7248  
##  Max.   :0.988494   Max.   :0.8448  
## 

Plot of Losses Distribution

Plot Correlations of Categorical Features

##   [1] "id"     "cat1"   "cat2"   "cat3"   "cat4"   "cat5"   "cat6"  
##   [8] "cat7"   "cat8"   "cat9"   "cat10"  "cat11"  "cat12"  "cat13" 
##  [15] "cat14"  "cat15"  "cat16"  "cat17"  "cat18"  "cat19"  "cat20" 
##  [22] "cat21"  "cat22"  "cat23"  "cat24"  "cat25"  "cat26"  "cat27" 
##  [29] "cat28"  "cat29"  "cat30"  "cat31"  "cat32"  "cat33"  "cat34" 
##  [36] "cat35"  "cat36"  "cat37"  "cat38"  "cat39"  "cat40"  "cat41" 
##  [43] "cat42"  "cat43"  "cat44"  "cat45"  "cat46"  "cat47"  "cat48" 
##  [50] "cat49"  "cat50"  "cat51"  "cat52"  "cat53"  "cat54"  "cat55" 
##  [57] "cat56"  "cat57"  "cat58"  "cat59"  "cat60"  "cat61"  "cat62" 
##  [64] "cat63"  "cat64"  "cat65"  "cat66"  "cat67"  "cat68"  "cat69" 
##  [71] "cat70"  "cat71"  "cat72"  "cat73"  "cat74"  "cat75"  "cat76" 
##  [78] "cat77"  "cat78"  "cat79"  "cat80"  "cat81"  "cat82"  "cat83" 
##  [85] "cat84"  "cat85"  "cat86"  "cat87"  "cat88"  "cat89"  "cat90" 
##  [92] "cat91"  "cat92"  "cat93"  "cat94"  "cat95"  "cat96"  "cat97" 
##  [99] "cat98"  "cat99"  "cat100" "cat101" "cat102" "cat103" "cat104"
## [106] "cat105" "cat106" "cat107" "cat108" "cat109" "cat110" "cat111"
## [113] "cat112" "cat113" "cat114" "cat115" "cat116" "cont1"  "cont2" 
## [120] "cont3"  "cont4"  "cont5"  "cont6"  "cont7"  "cont8"  "cont9" 
## [127] "cont10" "cont11" "cont12" "cont13" "cont14" "loss"

cat116

cat116_analysis = train %>% 
      select(cat116,loss) %>% 
      group_by(cat116) %>% 
      summarise(count=n()) %>% 
      arrange(desc(count))
datatable(cat116_analysis)
cat116_analysis2 = train %>% 
      select(cat116,loss) %>% 
      group_by(cat116) %>% 
      summarise(mean=mean(loss)) %>% 
      arrange(desc(mean))
datatable(cat116_analysis2)

cat112

## 51 Levels
table(train$cat112)
## 
##     A    AA    AB    AC    AD    AE    AF    AG    AH    AI    AJ    AK 
##  2411  1241   246   454  1531   834  9368  1331 18639  4749   144  6726 
##    AL    AM    AN    AO    AP    AQ    AR    AS    AT    AU    AV    AW 
##  1130  1170  9138   534  4000    30  2365 17669  1272   434  7122  3145 
##    AX    AY     B    BA     C     D     E     F     G     H     I     J 
##  1074  1414   423   190  2257  1645 25148  3149  3168   548   940 16222 
##     K     L     M     N     O     P     Q     R     S     T     U     V 
##  6059   493   439  8453  2183   406   793  1123  4201   521  8356   693 
##     W     X     Y 
##   461   925  1351

Correlation Plot

M = cor(train[,118:132], method="pearson")
corrplot.mixed(M, upper = "circle", order="hclust")

Preprocess Predictors with near zero-variance

preProc = preProcess(train, method = “nzv”) preProc

PCA